#!/usr/bin/env python
# encoding: utf-8

import requests
import pymysql
import time
from lxml import etree
import re
from datetime import datetime

headers = {
	'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
				  'Chrome/123.0.0.0 Safari/537.36'
}
base_url = 'http://315qc.com/Home/Carcomplaints/view/id/549764'


def get_content():
	login_page_response = requests.get(url=base_url, headers=headers)
	login_page_text = login_page_response.text
	return login_page_text


def scrapy_content(content1, url):
	tree = etree.HTML(content1)
	# 获取content
	introduct = tree.xpath('//div[@class="xwn-ti"]/text()')[0]
	# 投诉时间
	tousu_date = tree.xpath('//div[@class="xwn-gjk"]/div/text()')[0]
	# 投诉车型
	ul_list = tree.xpath('/html/body/div[5]/div[1]/div[1]/div[3]/ul')
	_brand, _bug = "", ""
	for ul in ul_list:
		# 车辆品牌
		brand = ul.xpath('./li[3]/text()')[0]
		_brand = re.search(r'(投诉车型：)(.+)', brand).group(2)
		# 异常
		bug = ul.xpath('normalize-space(./li[4]/text())')
		_bug = re.search(r'(投诉问题：)(.+)', bug).group(2)
	print("url:", url[::-1], "brand:", _brand, "bug:", _bug, "introduct:", introduct, "tousu_date:", tousu_date)


if __name__ == '__main__':
	# 获取网页信息
	content = get_content()
	# 解析网页信息
	scrapy_content(content, base_url)
